{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "56d509d8",
   "metadata": {},
   "source": [
    "使用最新的hadoop.ddl\n",
    "https://github.com/cdarlint/winutils"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "13678751",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 如果不存在，执行以下命令安装：pip install pyspark\n",
    "from pyspark.sql import SparkSession\n",
    "spark = SparkSession\\\n",
    "        .builder\\\n",
    "        .appName(\"Demo\")\\\n",
    "        .getOrCreate()\n",
    "inpath=r'D:\\develop\\var\\data\\cloudcost\\cloudcost-202209-cost.csv'\n",
    "outpath=r'D:\\develop\\var\\data\\cloudcost\\out'\n",
    "df=spark.read.option(\"header\", True).csv(inpath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fb24d5ec",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df.write.mode('overwrite').csv(outpath,header=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d1f09a4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
